
. /* ------------------------------------------------------------------------ ** 
>     C. Wunder, A. Wiencierz, J. Schwarze, H. K�chenhoff:    
>     Well-being over the life span: semiparametric evidence from British 
>     and German longitudinal data
> 
>     Data source:        German Socio-Economic Panel Study (SOEP), years 1986-2007.
>     Data organization:  person-year observations    
>     Description:        Generates and re-codes variables retrieved from the SOEP.
>                         The file "SOEP-1-Retrieval.soep" needs to be executed 
>                         before.
>     ----------------------------------------------------------------------- 
>         
>     Description of variables:
> 
>     life_sat            life satisfaction (11-point scale)
>     age                 age
>     female              female = 1; male = 0
>     disabled            disability status: disabled = 1; otherwise = 0
>     hospital            number nights stayed in hospital
>     educ                years of education
>     ln_netto            log of net household income
>     ln_hhsize           log of household size
>     german              German born = 1; otherwise = 0
>     fulltime            full time employed = 1; otherwise = 0
>     parttime            part time employed = 1; otherwise = 0
>     unempl              unemployed = 1; otherwise = 0
>     single              single = 1; otherwise = 0 
>     divorced            divorced = 1; otherwise = 0 
>     widowed             widowed = 1; otherwise = 0 
>     west                West-Germany = 1; otherwise = 0 
>     attr_in_1           attrition in 1 = 1; otherwise = 0
>     attr_in_2           attrition in 2 = 1; otherwise = 0
>     attr_in_3           attrition in 3 = 1; otherwise = 0
>     d_year4-d_year24    wave indicators (years 1990 and 1993 are omitted)   
> ** ------------------------------------------------------------------------ */ 
. version 10

. use "age-long.dta", clear // this file is produced by SOEP-1-Retrieval.soep
(PanelWhiz v2.0 Nov 2007 <john@panelwhiz.eu>)

. mvdecode _all, mv(-3=.a \ -2=.b \ -1=.c)
     hhnrakt: 925248 missing values generated
         sex: 144 missing values generated
     gebjahr: 18744 missing values generated
    erstbefr: 322128 missing values generated
     todjahr: 1390800 missing values generated
     todinfo: 1390728 missing values generated
    letztbef: 322128 missing values generated
    immiyear: 1321080 missing values generated
    germborn: 108432 missing values generated
     corigin: 109560 missing values generated
    gebmonat: 597648 missing values generated
     loc1989: 389976 missing values generated
       netto: 924746 missing values generated
      netold: 925269 missing values generated
     sampreg: 648297 missing values generated
         pop: 930762 missing values generated
       strat: 14352 missing values generated
         psu: 14352 missing values generated
        p80x: 1029 missing values generated
        p81x: 154096 missing values generated
        p83x: 6885 missing values generated
       p776x: 6885 missing values generated
        p84x: 5084 missing values generated
       p171x: 3351 missing values generated
       p493x: 243588 missing values generated
       p497x: 312899 missing values generated
       p195x: 52 missing values generated
       p622x: 1329 missing values generated
       p488x: 558 missing values generated
      p4718x: 56 missing values generated
      p4701x: 6983 missing values generated
      p4702x: 6983 missing values generated
      p2267x: 166307 missing values generated
      p4187x: 2631 missing values generated
      p2292x: 13682 missing values generated
      p2291x: 448 missing values generated
      p4648x: 120523 missing values generated
      h2743x: 25029 missing values generated
      p2123x: 33 missing values generated
      p2124x: 9179 missing values generated
      p2126x: 9705 missing values generated
      p2125x: 1736 missing values generated
        p80e: 17 missing values generated
        p81e: 1052 missing values generated
        p83e: 68 missing values generated
       p776e: 68 missing values generated
        p84e: 40 missing values generated
       p622e: 28 missing values generated
      h2743e: 85 missing values generated
         age: 32558 missing values generated
         yip: 104583 missing values generated

. 
. /*  --------( Coding of variables )---------------------------------------- */
. 
. gen         interview = (netto>=10 & netto<=19)

. sort        persnr year

. by persnr:  gen attr_in_1 = interview[_n+1]==0

. by persnr:  gen attr_in_2 = interview[_n+2]==0 

. by persnr:  gen attr_in_3 = interview[_n+3]==0

. replace     attr_in_1 = 0 if year==2007
(0 real changes made)

. replace     attr_in_2 = 0 if year==2006 | year==2007
(0 real changes made)

. replace     attr_in_3 = 0 if year==2005 | year==2006 | year==2007
(0 real changes made)

. gen         educ=p2292x
(1109073 missing values generated)

. gen         ln_netto = log(h2743x)
(1003898 missing values generated)

. gen         ln_hhsize = log(h2110x)
(927699 missing values generated)

. gen         german = (germborn==1) 

. gen         fulltime =(p4718x==1)

. gen         parttime =(p4718x==2 | p4718x==4)

. gen         fuppes = p171x
(1103850 missing values generated)

. replace     fuppes = p171x-4 if year==1984
(12290 real changes made, 45 to missing)

. gen         unempl =(fuppes==1)

. gen         married =(p2291x==1 | p2291x==2 |p2291x==6)

. gen         single =(p2291x==3)

. gen         divorced =(p2291x==4)

. gen         widowed =(p2291x==5)

. gen         disabled =(p4648x==1)

. gen         hospital = 0

. replace     hospital =p497x if p497x>=1 & p497x<.
(40972 real changes made)

. qui tab     year, gen (d_year)

. gen         female = sex-1
(144 missing values generated)

. qui su      gebjahr

. gen         cohort = gebjahr - r(min)
(18744 missing values generated)

. gen         cohortsq = cohort^2
(18744 missing values generated)

. drop        agesq

. gen         agesq = age^2/10^2
(51302 missing values generated)

. gen         agecub  = age^3/10^3
(51302 missing values generated)

. ren         p622x life_sat 

. 
. /*  --------( Define sample )---------------------------------------------- */           
>     
. xtset       persnr
       panel variable:  persnr (balanced)

. keep        if interview==1
(1095436 observations deleted)

. keep        if educ>=7
(0 observations deleted)

. keep        if h2743x >= 100    
(72 observations deleted)

. drop        if d_year7==1
(13964 observations deleted)

. drop        if d_year10==1 
(13178 observations deleted)

. drop        d_year7 d_year10 

. drop        if yip<=2
(81884 observations deleted)

. drop        if age<18
(6 observations deleted)

. global      xvar ///
>             female      disabled    hospital    educ        ln_netto    ln_hhsize ///
>             german      fulltime    parttime    unempl      single      divorced  ///
>             widowed     west        attr_in_1   attr_in_2   attr_in_3   d_year4   ///
>             d_year5     d_year6     d_year8     d_year9     d_year11    d_year12  ///
>             d_year13    d_year14    d_year15    d_year16    d_year17    d_year18  ///
>             d_year19    d_year20    d_year21    d_year22    d_year23    d_year24  

. tokenize    ${xvar}

. global      gls_xvar ///
>             gls_`1'  gls_`2'  gls_`3'  gls_`4'  gls_`5'  gls_`6'  gls_`7'  gls_`8'  ///
>             gls_`9'  gls_`10' gls_`11' gls_`12' gls_`13' gls_`14' gls_`15' gls_`16' ///
>             gls_`17' gls_`18' gls_`19' gls_`20' gls_`21' gls_`22' gls_`23' gls_`24' ///
>             gls_`25' gls_`26' gls_`27' gls_`28' gls_`29' gls_`30' gls_`31' gls_`32' ///
>             gls_`33' gls_`34' gls_`35' gls_`36' gls_one

. * drop observations with missing values on key variables
. keep persnr year gebjahr life_sat age* cohort* ${xvar}

. local varlist life_sat age cohort ${xvar}

. foreach var of local varlist {
  2.     drop if `var'>=. 
  3. }
(857 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(6580 observations deleted)
(12059 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)

. 
. /*  --------( Define variable labels )------------------------------------- */
. label var attr_in_1 "attrition in 1"

. label var attr_in_2 "attrition in 2"

. label var attr_in_3 "attrition in 3"

. label var age "age"

. label var agesq "age squared/10^2"

. label var agecub "age cubed/10^3"

. label var female "sex: female"

. label var disabled "disability status: disabled" 

. label var hospital "nights stayed in hospital"

. label var educ "years of education"

. label var ln_netto "log of net household income"

. label var ln_hhsize "log of household size"

. label var german "German"

. label var fulltime "full time employed"

. label var parttime "part time employed"

. label var unempl "unemployed"

. label var single "single"

. label var divorced "divorced"

. label var widowed "widowed"

. label var west "West-Germany"

. label var d_year4 "1987"

. label var d_year5 "1988"

. label var d_year6 "1989"

. label var d_year8 "1991"

. label var d_year9 "1992"

. label var d_year11 "1994"

. label var d_year12 "1995"

. label var d_year13 "1996"

. label var d_year14 "1997"

. label var d_year15 "1998"

. label var d_year16 "1999"

. label var d_year17 "2000"

. label var d_year18 "2001"

. label var d_year19 "2002"

. label var d_year20 "2003"

. label var d_year21 "2004"

. label var d_year22 "2005"

. label var d_year23 "2006"

. label var d_year24 "2007"

. 
. save raw.dta, replace
file raw.dta saved

. 
. /*  --------( GLS-transformation )----------------------------------------- */
. use raw.dta, clear
(PanelWhiz v2.0 Nov 2007 <john@panelwhiz.eu>)

. qui xtreg life_sat age agesq agecub ${xvar}, re

. keep if e(sample)
(0 observations deleted)

. scalar sig_e = e(sigma_e)

. scalar sig_u = e(sigma_u)

. gen one=1

. bysort persnr: egen T_i = count(life_sat)

. gen theta_i = 1 - sig_e / (sqrt(sig_e^2 + T_i*sig_u^2)) 

. global all_vars $xvar life_sat age agesq agecub one

. foreach var of global all_vars{
  2.     bysort persnr: egen im_`var' = mean(`var')
  3.     gen gls_`var' = `var' - theta_i*im_`var'
  4. }

. drop im*

. foreach var of global gls_xvar{         
  2.     gen _`var' = `var'
  3. }

. save "SOEP.dta", replace
file SOEP.dta saved

. qui log close
